Night Owl 6

home *** CD-ROM | disk | FTP | other *** search

/ Night Owl 6 / Night Owl's Shareware - PDSI-006 - Night Owl Corp (1990).iso / 031a / winlava.zip / SQRT.ASM < prev

Wrap

Assembly Source File | 1991-09-10 | 16KB | 626 lines

;---------------------------Module-Header-------------------------------; ; Module Name: SQRT.ASM ; ; Contains FIXED and ULONG square root ; ; Created: Sun 30-Aug-1987 19:28:30 ; ; NOTE: the 64-bit sqrt code is 386 specific! ; ;-----------------------------------------------------------------------; ; ; (C) Copyright Microsoft Corp. 1987-1991. All rights reserved. ; ; You have a royalty-free right to use, modify, reproduce and ; distribute the Sample Files (and/or any modified version) in ; any way you find useful, provided that you agree that ; Microsoft has no warranty obligations or liability for any ; Sample Application Files which are modified. ; ?WIN = 0 ?PLM = 0 ?NODATA = 1 .xlist include cmacros.inc .list UQUAD struc uq0 dw ? uq1 dw ? uq2 dw ? uq3 dw ? UQUAD ends PTL struc ptl_x dd ? ptl_y dd ? PTL ends POINTFX struc x dd ? y dd ? z dd ? w dd ? POINTFX ends MAXINT equ 7FFFh MININT equ 8000h ; The following two equates are just used as shorthand ; for the "word ptr" and "byte ptr" overrides. wptr equ word ptr bptr equ byte ptr ; The following structure should be used to access high and low ; words of a DWORD. This means that "word ptr foo[2]" -> "foo.hi". LONG struc lo dw ? hi dw ? LONG ends FARPOINTER struc off dw ? sel dw ? FARPOINTER ends assert macro a,b,c,d endm EAXtoDXAX macro shld edx,eax,16 ; move HIWORD(eax) to dx endm DXAXtoEAX macro ror eax,16 ; xchg HIWORD(eax) and LOWORD(eax) shrd eax,edx,16 ; move LOWORD(edx) to HIWORD(eax) endm .386 sBegin Code assumes cs,Code assumes es,nothing assumes ds,nothing ;---------------------------Private-Macro-------------------------------; ; set_ov ; ; Sets the overflow flag ; ; Entry: ; reg = word register to use for scratch ; Returns: ; OF set ; Error Returns: ; none ; Registers Destroyed: ; reg ;-----------------------------------------------------------------------; set_ov macro reg mov reg,8000h dec reg endm ;---------------------------Public-Routine------------------------------; ; square_root ; ; Computes the Fixed square root of a Fixed. This algorithm ; comes from Knuth D.E; Metafont:The Program (Addison-Weseley, 1986) ; Part 8:Algebraic and Transcendental Functions. ; ; Notation used below: ; ; Entry: ; DX:AX = number to square root ; Returns: ; DX:AX = square root of input ; Error Returns: ; OF = 1 ; Registers Destroyed: ; BX,CX ; Registers Preserved: ; DS,ES,SI,DI ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc fxsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<> ParmD fx cBegin mov ax, fx.lo mov dx, fx.hi cCall square_root mov dh,dl ; DX:AX *= sqrt(1000h) mov dl,ah mov ah,al xor al,al cEnd cProc ulsqrt,<PUBLIC,NEAR,NODATA,NONWIN>,<> ParmD ul cBegin mov ax, ul.lo mov dx, ul.hi cCall square_root cEnd ; ; compute the square root of EDX:EAX ; ; retult returned in EAX and DX:AX ; ; We dont have a 64-bit square_root function ; so approximate it by dividing by 4 until we can use our ; 32-bit function ; cProc sqrt64,<PUBLIC,NEAR,NODATA,NONWIN>,<> cBegin xor cx,cx test_less_than_32: or edx,edx ; is it less than 32-bit? jz less_than_32 inc cx shrd eax,edx,2 ; divide by 4 shr edx,2 jmp short test_less_than_32 less_than_32: ;; ;; EDX:EAX is now less than 32 bits, CX contains count of ;; divisions by 4 we had to do to get here. call square_root ;; and then re-normalize ;; push cx EAXtoDXAX call square_root pop cx jcxz sqrt64_exit @@: shl ax,1 rcl dx,1 loop @b sqrt64_exit: DXAXtoEAX cEnd cProc square_root,<PUBLIC,NEAR,NODATA,NONWIN>,<SI,DI> ;; ParmD fx cBegin ;; mov ax, fx.lo ;; mov dx, fx.hi ; check for < 1 since this algorithm returns 0000:0001 or dx,dx jnz non_zero_arg cmp ax,1 ja non_zero_arg jmp square_root_exit negative_arg: ; can't have number negative xor ax,ax cwd set_ov bx jmp square_root_exit non_zero_arg: cCall ulNormalize ;;;;;;;;jcxz negative_arg jz exit_relay shr cx,1 jc add_shifts_only shr dx,1 rcr ax,1 dec cx add_shifts_only: ;; ;; use 23 for a FIXED square_root, and 15 for a ULONG square_root! ;; if 0 mov ch,23 ; FIXED square_root else mov ch,15 ; ULONG square_root endif sub ch,cl mov cl,8 sub ch,cl jg more_than_8 ; CL = Min(count,8) add cl,ch xor ch,ch ; CH = Max(count-8,0) more_than_8: xchg ax,si ; DI:SI = x mov di,dx xor ax,ax ; AX = y = lower bound = 0 mov bx,2 ; BX = q = estimate of root = 2 add si,si ; intiialize y = top bit of x adc di,di adc ax,ax first_8_loop: add si,si ; move 2 bits from top adc di,di ; of x to bottom of y adc ax,ax assert NC add si,si adc di,di adc ax,ax assert NC sub ax,bx ; AX = y - q jle y_neg_or_zero_8 ;!!!CR loops can be cleaned up some add bx,bx ; BX = 2q sub ax,bx ; AX = y - 3q jg y_greater_q_8 add ax,bx ; AX = y - q dec cl jnz first_8_loop jcxz first_8_was_enough jmp short done_with_1st_8 y_greater_q_8: add bx,2 ; BX = 2q + 2 (or 2q if jg not taken) dec cl jnz first_8_loop or ch,ch jnz done_with_1st_8 first_8_was_enough: xchg ax,bx ; DX:AX = q xor dx,dx shr ax,1 ; root = q >> 1 or ax,ax ; clear overflow flag exit_relay: jmp square_root_exit y_neg_or_zero_8: ; come here if y <= 0 dec bx add bx,bx ; BX = 2q - 2 add ax,bx ; AX = y + q - 2 dec cl jnz first_8_loop jcxz first_8_was_enough ;si is now empty, di contains all signifigant bits done_with_1st_8: mov cl,4 sub ch,cl jg second_4_loop ; CL = Min(count,4) add cl,ch xor ch,ch ; CH = Max(count-8,0) second_4_loop: ; move 2 bits from top add di,di ; of x to bottom of y adc ax,ax assert NC add di,di adc ax,ax assert NC sub ax,bx ; AX = y - q jle y_neg_or_zero_2nd_4 add bx,bx ; BX = 2q ;!!!CR use compare and jump rather than sub, branch, and restore sub ax,bx ; AX = y - 3q jg y_greater_q_2nd_4 add ax,bx ; AX = y - q dec cl jnz second_4_loop jcxz second_4_was_enough jmp short done_with_2nd_4 y_greater_q_2nd_4: add bx,2 ; BX = 2q + 2 (or 2q if jg not taken) dec cl jnz second_4_loop or ch,ch jnz done_with_2nd_4 second_4_was_enough: xchg ax,bx ; DX:AX = q xor dx,dx shr ax,1 ; root = q >> 1 or ax,ax ; clear overflow flag jmp square_root_exit y_neg_or_zero_2nd_4: ; come here if y <= 0 dec bx add bx,bx ; BX = 2q - 2 add ax,bx ; AX = y + q - 2 dec cl jnz second_4_loop jcxz second_4_was_enough done_with_2nd_4: xor dx,dx ; DX:AX = y SI:BX = q mov cl,4 sub ch,cl jg third_4_loop ; CL = Min(count,4) add cl,ch xor ch,ch ; CH = Max(count-13,0) third_4_loop: ;!!!CR Add comment noting that 32 bit math is being used now ; move 2 bits from top add di,di ; of x to bottom of y adc ax,ax adc dx,dx assert NC add di,di adc ax,ax adc dx,dx assert NC sub ax,bx ; DX:AX = y - q sbb dx,si js y_negative_3rd_4 jz y_might_be_zero_3rd_4 sorry_y_not_zero_3rd_4: add bx,bx adc si,si ; SI:BX = 2q sub ax,bx ; DX:AX = y - 3q sbb dx,si jg y_greater_q_3rd_4 jl y_less_or_equal_q_3rd_4 or ax,ax jnz y_greater_q_3rd_4 y_less_or_equal_q_3rd_4: add ax,bx ; DX:AX = y - q adc dx,si dec cl jnz third_4_loop jcxz third_4_was_enough jmp short done_with_3rd_4 y_greater_q_3rd_4: add bx,2 adc si,0 dec cl jnz third_4_loop or ch,ch jnz done_with_3rd_4 third_4_was_enough: xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp square_root_exit ;;; short! y_might_be_zero_3rd_4: or ax,ax jnz sorry_y_not_zero_3rd_4 y_negative_3rd_4: ; come here if y <= 0 sub bx,1 sbb si,0 add bx,bx ; DL:BX = 2q - 2 adc si,si add ax,bx adc dx,si ; DH:AX = y + q - 2 dec cl jnz third_4_loop jcxz third_4_was_enough done_with_3rd_4: xchg ch,cl last_7_loop: add ax,ax adc dx,dx add ax,ax adc dx,dx sub ax,bx ; DX:AX = y - q sbb dx,si js y_negative_last_7 jz y_might_be_zero_last_7 ;!!!CR Clean up the exit so that a single exit point is used. sorry_y_not_zero_last_7: add bx,bx adc si,si ; SI:BX = 2q sub ax,bx ; DX:AX = y - 3q sbb dx,si jg y_greater_q_last_7 jl y_less_or_equal_q_last_7 or ax,ax jnz y_greater_q_last_7 y_less_or_equal_q_last_7: add ax,bx ; DX:AX = y - q adc dx,si loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp short square_root_exit y_greater_q_last_7: add bx,2 adc si,0 loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag jmp short square_root_exit y_might_be_zero_last_7: or ax,ax jnz sorry_y_not_zero_last_7 y_negative_last_7: ; come here if y <= 0 sub bx,1 sbb si,0 add bx,bx ; DL:BX = 2q - 2 adc si,si add ax,bx adc dx,si ; DH:AX = y + q - 2 loop last_7_loop xchg ax,bx ; DX:AX = q mov dx,si shr dx,1 ; root = q >> 1 rcr ax,1 or ax,ax ; clear overflow flag square_root_exit: cEnd ;---------------------------Public-Routine------------------------------; ; ulNormalize ; ; Normalizes a ULONG so that the highest order bit is 1. Returns the ; number of shifts done. Also returns ZF=1 if the ULONG was zero. ; ; Entry: ; DX:AX = ULONG ; Returns: ; DX:AX = normalized ULONG ; CX = shift count ; ZF = 1 if the ULONG is zero, 0 otherwise ; Registers Destroyed: ; none ;-----------------------------------------------------------------------; assumes ds,nothing assumes es,nothing cProc ulNormalize,<PUBLIC,NEAR> cBegin ; shift by words xor cx,cx or dx,dx js ulNormalize_exit jnz top_word_ok xchg ax,dx or dx,dx jz ulNormalize_exit ; the zero exit mov cl,16 js ulNormalize_exit top_word_ok: ; shift by bytes or dh,dh jnz top_byte_ok xchg dh,dl xchg dl,ah xchg ah,al add cl,8 or dh,dh js ulNormalize_exit top_byte_ok: ; do the rest by bits inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx js ulNormalize_exit inc cx add ax,ax adc dx,dx ulNormalize_exit: cEnd sEnd Code end